Experiment & contact info
PIs: Rosalie Sears (searsr@ohsu.edu), Laura Heiser (heiserl@ohsu.edu)
Sample preparation: Zinab Doha (dohaz@ohsu.edu)
Library prep from single cells: Xi Li & Colin Daniel (danielc@ohsu.edu)
Analysis: Nick Calistri (calistri@ohsu.edu)
Sequencing performed by OHSU MPSSR
Analysis design
load rliger integrated data (s2_vehicle_rliger.rds)
Optimize leiden clustering for maximized silhouette width
Visualize cluster versus lineage markers
Manually assign lineage
Run 3D UMAP and visualize clusters + lineage in 3d
Find DEGs (upregulated) for each cluster compared to all other clusters
Save updated so_merge in updated .rds file
Set up
Load libraries
library(Matrix)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.8
## v tidyr 1.2.0 v stringr 1.4.0
## v readr 2.1.2 v forcats 0.5.1
## Warning: package 'tibble' was built under R version 4.1.2
## Warning: package 'tidyr' was built under R version 4.1.2
## Warning: package 'readr' was built under R version 4.1.2
## Warning: package 'dplyr' was built under R version 4.1.2
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x tidyr::expand() masks Matrix::expand()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x tidyr::pack() masks Matrix::pack()
## x tidyr::unpack() masks Matrix::unpack()
library(Seurat)
## Warning: package 'Seurat' was built under R version 4.1.2
## Registered S3 method overwritten by 'spatstat.geom':
## method from
## print.boxx cli
## Attaching SeuratObject
library(cluster)
load s2_vehicle_integrated.rds and set seed
so_merge <- readRDS('analysis_files/s2_vehicle_integrated.rds')
so_merge@meta.data$cell_barcode <- rownames(so_merge@meta.data)
set.seed(3)
Prepare silhoutte scoring function
library(bluster)
cluster_sweep <- function(resolution = seq(from = 0.3, to = 0.8, by = 0.1), seurat_object, embedding, ndims = ncol(emedding)){
for(i in resolution){
seurat_object <- FindClusters(seurat_object,
resolution = i,
algorithm = 4)
p1 <- DimPlot(seurat_object,
group.by = 'seurat_clusters',
label = TRUE)+
coord_equal()
print(p1)
curr_clusters <- seurat_object@meta.data$seurat_clusters
sil <- approxSilhouette(x = embedding,
clusters = curr_clusters)
boxplot(split(sil$width, curr_clusters),
main = paste0('Resolution: ', i, '\n Mean sil.width: ', mean(sil$width)))
best.choice <- ifelse(sil$width > 0,
curr_clusters,
sil$other)
table(Assigned=curr_clusters, Closest=best.choice)
#
rmsd <- clusterRMSD(embedding, curr_clusters)
barplot(rmsd,
main = paste0('Resolution: ', i, '\n Mean rmsd: ', mean(rmsd)))
if(i == resolution[1]){
qc <- tibble(res = i,
n_clusters = length(unique(sil$cluster)),
mean_sil_width = mean(sil$width),
mean_rmsd = mean(rmsd))
}else{
qc <- rbind(qc,
tibble(res = i,
n_clusters = length(unique(sil$cluster)),
mean_sil_width = mean(sil$width),
mean_rmsd = mean(rmsd)))
}
}
return(qc)
}
Cluster optimization (by silhouette width)
reduction_to_use <- 'iNMF'
ndims <- 50
###
ElbowPlot(so_merge, ndims = 50, reduction = 'harmony')
so_merge <- FindNeighbors(so_merge,
reduction = reduction_to_use,
dims = 1:ndims)
## Computing nearest neighbor graph
## Computing SNN
so_merge <- RunUMAP(so_merge,
reduction = reduction_to_use,
dims = 1:ndims)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 09:25:38 UMAP embedding parameters a = 0.9922 b = 1.112
## 09:25:38 Read 14042 rows and found 50 numeric columns
## 09:25:38 Using Annoy for neighbor search, n_neighbors = 30
## 09:25:38 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 09:25:41 Writing NN index file to temp file C:\Users\Nick\AppData\Local\Temp\RtmpwBA9Fz\file183026a1557a
## 09:25:41 Searching Annoy index using 1 thread, search_k = 3000
## 09:25:45 Annoy recall = 100%
## 09:25:45 Commencing smooth kNN distance calibration using 1 thread
## 09:25:45 Initializing from normalized Laplacian + noise
## 09:25:46 Commencing optimization for 200 epochs, with 635162 positive edges
## 09:25:58 Optimization finished
cluster_qc <- cluster_sweep(seurat_object = so_merge,
embedding = so_merge@reductions$iNMF@cell.embeddings,
resolution = seq(from = 0.1, to = 1.5, by = 0.05))
cluster_qc <- cluster_qc %>%
gather(- c(res), value = 'value', key = 'metric')
ggplot(cluster_qc, aes(x = res, y = value, color = metric))+
geom_point()+
geom_line()+
theme_bw()+
facet_wrap(~metric, ncol = 3, scales = 'free_y')+
scale_x_continuous(breaks = seq(from = 0, to = 1.5, by = 0.25))+
xlab('Leiden resolution')+
geom_vline(xintercept = 0.45, linetype = 'dashed')
Supplementary figure for clustering optimization
# 1080 x 240
ggplot(cluster_qc, aes(x = res, y = value, color = metric))+
geom_point()+
geom_line()+
theme_bw()+
facet_wrap(~metric, ncol = 3, scales = 'free_y')+
scale_x_continuous(breaks = seq(from = 0, to = 1.5, by = 0.25))+
xlab('Leiden resolution')+
geom_vline(xintercept = 0.45, linetype = 'dashed')
Cluster based on optimal resolution
so_merge <- FindClusters(so_merge,
algorithm = 4,
resolution = 0.45)
DimPlot(so_merge, label = TRUE, label.size = 8)+
coord_equal()+
theme(legend.position = 'none')
ggplot(so_merge@meta.data, aes(x = seurat_clusters, fill = tumor))+
geom_bar()+
theme_bw()+
facet_wrap(~tumor)
ggplot(so_merge@meta.data, aes(x = seurat_clusters, fill = histology))+
geom_bar()+
theme_bw()+
facet_wrap(~histology, ncol = 1)
Visualize lineage markers by cluster
DotPlot(so_merge,
features = c('Ptprc', 'Cd74', 'Epcam', 'Cd14', 'Cd3e', 'Pdgfra', 'Krt5', 'Pecam1', 'Mki67', 'Pdgfrb'),
cols = 'RdYlBu')+
theme_bw()+
coord_flip()
Cluster 14 lineage assignment
Subset cluster 14
c14 <- subset(so_merge,
subset = seurat_clusters == 14)
Harmony is used because the underlying PCA will separate celltype in sparser latent space than iNMF
Harmony + UMAP
library(harmony)
## Loading required package: Rcpp
c14 <- FindVariableFeatures(c14)
c14 <- NormalizeData(c14)
c14 <- ScaleData(c14)
## Centering and scaling data matrix
c14 <- RunPCA(c14)
## PC_ 1
## Positive: Tmsb10, Trbc2, Sept1, Cd3g, Ablim1, Cd3d, Ptprcap, Trac, Skap1, S100a10
## Il2rb, Ctsw, Ctla2a, Pdcd4, Ltb, Lck, S100a6, Rora, Thy1, Nkg7
## Cd226, Cxcr6, Dut, Sh2d2a, Trbc1, Ikzf3, Il18r1, Itgae, Inpp4b, Ahnak
## Negative: C1qc, Ctss, C1qa, C1qb, Mpeg1, Cx3cr1, Apoe, Csf1r, Aif1, Lgmn
## Cd74, Ctsc, Ms4a7, Cxcl16, Cst3, Ctsz, Hexb, Gatm, Cybb, Tyrobp
## Tgfbi, Lyz2, Cd68, Ctsb, Pld4, Psap, Ftl1, H2-Ab1, Fcer1g, Man2b1
## PC_ 2
## Positive: Lcn2, Wfdc18, Csn3, Epcam, Nfib, Fxyd3, Trf, Cldn7, Cldn3, Igfbp5
## Cald1, Krt8, Dbi, Krt18, Aqp5, Sox9, Plet1, Sox4, Phlda1, Nfix
## Nedd4, Scgb2b27, Auts2, Apoc1, Tpm1, Cd9, Fermt2, Chil1, Ptprf, Cd24a
## Negative: AW112010, Srgn, Ltb, Crip1, Ptprcap, Ctsw, Vim, Cd3g, Ablim1, Sept1
## Il2rb, Nkg7, S100a4, Trbc2, Gimap4, Skap1, Ifngr1, Rgs1, Itgae, S100a10
## Cd226, Inpp4b, Bcl2, Lck, Esyt1, H2-Q7, Ptpn22, Cd3d, Atad2, Gzmb
## PC_ 3
## Positive: AY036118, Igkc, Maf, Il7r, Gm26917, Ccnb1ip1, Icos, Apoe, Cxcr6, Cd6
## Rora, Lst1, Cd74, Il2ra, Ms4a4b, Rmrp, Lgals1, Hbb-bs, Trac, Hba-a1
## Ly6a, Ccr2, Cd3d, Cd4, Il18r1, Hbb-bt, Ttc39c, H2-Aa, Ifi27l2a, Thy1
## Negative: Cemip2, Gzmb, Car2, Xcl1, Hsp90ab1, Rps2, Klra5, Ier5l, Ctla2b, Junb
## Rps18, Cdv3, Npm1, Hspd1, Fasl, Nme2, Nme1, Eif5a, Errfi1, Rpl12
## Sult2b1, Cebpb, Eef1b2, Nr4a2, Rps3, Eef1g, Klrk1, Itih5, Sfr1, Cited4
## PC_ 4
## Positive: Lgals1, Cavin3, Maf, S100a4, S100a10, Rhobtb3, Tmem64, Serpinb1a, Palld, Cav1
## Trdc, Cd163l1, Il7r, Rora, Ptpn14, Col6a1, Ramp1, Capg, Ppic, Phlda3
## Fat1, Igfbp3, Trdv4, Cx3cl1, Tpm2, Timp1, Krt15, Cd3d, Fkbp9, Ikzf2
## Negative: Xcl1, Scgb2b27, Klrd1, Nr4a2, Scgb1b27, Cemip2, Gzmb, Ncr1, Serpinb9, Klra5
## Car6, Ctla2a, Etv1, Wfdc18, Gzma, Vps37b, Serpinb6b, Chn2, Errfi1, Aqp5
## Neurl3, Ppp1r3b, Cd7, Klre1, Nkg7, Hba-a1, Hbb-bs, Gzmc, Ctla2b, Lcn2
## PC_ 5
## Positive: Krt15, Dsc3, Krt5, Krt14, Sparc, Fat1, Fkbp9, Vcan, Igsf10, Ppic
## Tagln, Slpi, Trp63, Cav1, Acta2, Postn, S100a16, Serpine2, A430105J06Rik, Igfbp3
## Hmcn1, Cxcl14, Phlda3, Sfrp1, Brinp3, Col4a1, Tpm2, Palld, Col6a1, Gas1
## Negative: Nme1, Nme2, Chchd10, Kcnk1, Rplp0, Rps3, Elf5, Atp2c2, 4931406C07Rik, Cldn7
## Cystm1, Kcnn4, Eef1b2, Ppp1r2, Wfdc18, Npm1, Prss8, Rpl12, 1110008P14Rik, Erh
## Nhp2, Ass1, Epcam, Rpsa, Smim22, Cd24a, Plet1, Rab25, Cldn3, Ehf
c14 <- RunHarmony(c14,
group.by.vars = 'library_id')
## Harmony 1/10
## Harmony 2/10
## Harmony 3/10
## Harmony 4/10
## Harmony 5/10
## Harmony 6/10
## Harmony 7/10
## Harmony converged after 7 iterations
## Warning: Invalid name supplied, making object name syntactically valid. New
## object name is Seurat..ProjectDim.RNA.harmony; see ?make.names for more details
## on syntax validity
ElbowPlot(c14)
c14 <- RunUMAP(c14, reduction = 'harmony', dims = 1:5,
seed.use = 500)
## 09:45:05 UMAP embedding parameters a = 0.9922 b = 1.112
## 09:45:05 Read 340 rows and found 5 numeric columns
## 09:45:05 Using Annoy for neighbor search, n_neighbors = 30
## 09:45:05 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 09:45:05 Writing NN index file to temp file C:\Users\Nick\AppData\Local\Temp\RtmpwBA9Fz\file18307e5632a1
## 09:45:05 Searching Annoy index using 1 thread, search_k = 3000
## 09:45:05 Annoy recall = 100%
## 09:45:06 Commencing smooth kNN distance calibration using 1 thread
## 09:45:06 Initializing from normalized Laplacian + noise
## 09:45:06 Commencing optimization for 500 epochs, with 11592 positive edges
## 09:45:07 Optimization finished
DimPlot(c14, label = TRUE)+
coord_equal()
FeaturePlot(c14,
features = c('Epcam', 'Cd3e', 'Cd14', 'Pdgfra'))
Subcluster
c14 <- FindNeighbors(c14,
reduction = 'harmony',
dims = 1:5)
## Computing nearest neighbor graph
## Computing SNN
cluster_qc <- cluster_sweep(seurat_object = c14,
embedding = c14@reductions$harmony@cell.embeddings[,1:5],
resolution = seq(from = 0.1, to = 1.5, by = 0.1))
cluster_qc <- cluster_qc %>%
gather(c(-res, -n_clusters), value = 'value', key = 'metric')
ggplot(cluster_qc, aes(x = res, y = value, color = metric))+
geom_point(aes(size = n_clusters))+
geom_line()+
theme_bw()+
facet_wrap(~metric, ncol = 1, scales = 'free_y')
c14 <- FindClusters(c14,
resolution = 0.1,
algorithm = 4)
DotPlot(c14,
features = c('Cd14', 'Cd3e', 'Epcam', 'Pdgfra', 'Pecam1'),
cols = 'RdYlBu')+
coord_flip()+
RotatedAxis()
DimPlot(c14, label = TRUE, label.size = 8)+
coord_equal()+
theme(legend.position = 'none')
FeaturePlot(c14, features = c('Cd3e', 'Cd14', 'Cd74', 'Epcam'))
DotPlot(c14,
features = c('Ptprc', 'Cd74', 'Epcam', 'Cd14', 'Cd3e', 'Pdgfra', 'Krt5', 'Pecam1', 'Mki67', 'Pdgfrb'),
cols = 'RdYlBu')+
theme_bw()+
coord_flip()
c14_subcluster_dict <- tibble(cell_barcode = rownames(c14@meta.data),
cluster_l2 = 14 + as.numeric(c14@meta.data$seurat_clusters)/10)
Rejoin subclustered with the rest of data
# Add c14 subcluster info back to main seurat object
so_merge@meta.data$cell_barcode <- rownames(so_merge@meta.data)
so_merge@meta.data <- full_join(x = so_merge@meta.data,
y = c14_subcluster_dict,
by = 'cell_barcode',
suffix = c('', ''))
rownames(so_merge@meta.data) <- so_merge@meta.data$cell_barcode
# Fill in cluster_l2 as seurat_clusters for each cluster without subclusters
so_merge@meta.data$cluster_l2[is.na(so_merge@meta.data$cluster_l2)] <- so_merge@meta.data$seurat_clusters[is.na(so_merge@meta.data$cluster_l2)]
Idents(so_merge) <- 'cluster_l2'
Assign lineage
Lineage is biological interpretation from major markers.
DotPlot(so_merge,
features = c('Ptprc', 'Cd74', 'Epcam', 'Cd14', 'Cd3e', 'Pdgfra', 'Krt5', 'Pecam1', 'Mki67', 'Pdgfrb'),
cols = 'RdYlBu')+
theme_bw()+
coord_flip()
epi_clusters <- c(2, 8, 10, 19, 14.3)
lymphoid_clusters <- c(5, 6, 9, 13, 14.1, 21, 22)
myeloid_clusters <- c(1, 3, 4, 14.2, 15, 16, 18, 20, 24)
fibroblast_clusters <- c(7, 12, 17)
endothelial_clusters <- c(11)
perivascular_clusters <- c(23)
lineage_dict <- tibble(lineage = c(rep('epithelial', length(epi_clusters)),
rep('lymphoid', length(lymphoid_clusters)),
rep('myeloid', length(myeloid_clusters)),
rep('fibroblast', length(fibroblast_clusters)),
rep('endothelial', length(endothelial_clusters)),
rep('perivascular', length(perivascular_clusters))),
cluster_id = c(epi_clusters, lymphoid_clusters, myeloid_clusters, fibroblast_clusters,
endothelial_clusters, perivascular_clusters))
so_merge[['lineage']] <- plyr::mapvalues(x = so_merge@meta.data$cluster_l2,
from = lineage_dict$cluster_id,
to = lineage_dict$lineage)
# Make sure cluster_l2 and lineage are factors for Seurat plotting compatibility
so_merge@meta.data$lineage <- as.factor(so_merge@meta.data$lineage)
so_merge@meta.data$cluster_l2 <- as.factor(so_merge@meta.data$cluster_l2)
DotPlot(so_merge,
features = c( 'Pecam1', 'Epcam', 'Krt5', 'Pdgfra', 'Ptprc', 'Cd3e', 'Cd14', 'Cd74', 'Pdgfrb'),
cols = 'RdYlBu',
group.by = 'lineage')+
coord_flip()+
RotatedAxis()
Cluster relationship investigation
Lineage vs nmf component score
# Average iNMF embeddings to show relationship to lineage
cluster_embedding <- tibble(lineage = so_merge@meta.data$lineage) %>%
cbind(so_merge@reductions$iNMF@cell.embeddings) %>%
group_by(lineage) %>%
summarize(across(everything(), list(mean)))
pheatmap::pheatmap(data.frame(cluster_embedding[,2:ncol(cluster_embedding)],
row.names = cluster_embedding$lineage),
scale = 'column',
main = 'Average iNMF embedding per cluster \n column z-scored')
# Average iNMF embedding to show relationship to cluster, annotated by lineage
cluster_embedding <- tibble(cluster_l2 = so_merge@meta.data$cluster_l2) %>%
cbind(so_merge@reductions$iNMF@cell.embeddings) %>%
group_by(cluster_l2) %>%
summarize(across(everything(), list(mean)))
annotation_rows <- data.frame(lineage = lineage_dict$lineage,
row.names = lineage_dict$cluster_id)
pheatmap::pheatmap(data.frame(cluster_embedding[,2:ncol(cluster_embedding)],
row.names = cluster_embedding$cluster_l2),
scale = 'row',
main = 'Average iNMF embedding per cluster \n row z-scored',
annotation_row = annotation_rows,
cutree_rows = 7)
pheatmap::pheatmap(data.frame(cluster_embedding[,2:ncol(cluster_embedding)],
row.names = cluster_embedding$cluster_l2),
scale = 'column',
main = 'Average iNMF embedding per cluster \n column z-scored',
annotation_row = annotation_rows,
cutree_rows = 6)
DimPlot(so_merge,
label = TRUE,
label.size = 6,
group.by = 'lineage')+
coord_equal()
Correlation between clusters
code adapted from: https://github.com/satijalab/seurat/issues/1552
Idents(so_merge) <- 'cluster_l2'
av.exp <- AverageExpression(so_merge)$RNA[VariableFeatures(so_merge),]
cor.exp <- as.data.frame(cor(av.exp, method = 'pearson'))
pheatmap::pheatmap(cor.exp,
annotation_row = annotation_rows)
DimPlot(so_merge,
label = TRUE)+
coord_equal()
Celltype fractions
Visualize celltype fractions per tumor
so_merge_meta <- so_merge@meta.data %>%
mutate(tumor_pheno = paste0(tumor, '-', histology))
freq_table <- so_merge_meta %>%
dplyr::select(lineage, tumor_pheno) %>%
table()
col_anno <- data.frame(histology = str_split(colnames(freq_table), pattern = '-', simplify = TRUE)[,2],
log10_n_cells = log10(colSums(freq_table)),
row.names = colnames(freq_table))
pheatmap::pheatmap(prop.table(freq_table, margin = 2),
display_numbers = freq_table,
main = 'celltype fraction \n Number = n_cells per celltype per tumor',
annotation_col = col_anno)
Assign celltype_frac tumor type
- SP = stromal poor
- SR_IP = stromal-rich immune poor
- SR_IR = stromal-rich immune rich
sp <- c('V8_T1', 'V3_T2', 'V4', 'V3_T1')
srip <- c('V6_T1', 'V6_T3')
srir <- c('V7_T3', 'V7_T1', 'V7_T2', 'V5', 'V6_T2')
cellfrac_assignments <- data.frame(rbind(cbind('SP', sp),
cbind('SR_IP', srip),
cbind('SR_IR', srir)))
colnames(cellfrac_assignments) <- c('cellfrac_type', 'tumor')
so_merge@meta.data$cellfrac_type <- plyr::mapvalues(x = so_merge@meta.data$tumor,
from = cellfrac_assignments$tumor,
to = cellfrac_assignments$cellfrac_type)
# Visualize results
table(so_merge@meta.data$cellfrac_type) %>%
knitr::kable()
| Var1 | Freq |
|---|---|
| SP | 3346 |
| SR_IP | 1278 |
| SR_IR | 9418 |
DimPlot(so_merge,
group.by = 'cellfrac_type')+
facet_wrap(~cellfrac_type)
so_meta <- so_merge@meta.data
ggplot(so_meta, aes(x = cluster_l2, fill = cellfrac_type))+
geom_bar()+
theme_bw()+
facet_grid(cellfrac_type~lineage, scales = 'free', space = 'free_x')+
RotatedAxis()
3d umap
so_merge <- RunUMAP(so_merge,
reduction = 'iNMF',
dims = 1:50,
n.components = 3L,
reduction.name = 'umap3d',
reduction.key = 'umap3d_')
## 09:45:27 UMAP embedding parameters a = 0.9922 b = 1.112
## 09:45:27 Read 14042 rows and found 50 numeric columns
## 09:45:27 Using Annoy for neighbor search, n_neighbors = 30
## 09:45:27 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 09:45:30 Writing NN index file to temp file C:\Users\Nick\AppData\Local\Temp\RtmpwBA9Fz\file1830501a1548
## 09:45:30 Searching Annoy index using 1 thread, search_k = 3000
## 09:45:33 Annoy recall = 100%
## 09:45:33 Commencing smooth kNN distance calibration using 1 thread
## 09:45:34 Initializing from normalized Laplacian + noise
## 09:45:35 Commencing optimization for 200 epochs, with 635162 positive edges
## 09:45:47 Optimization finished
so_meta <- so_merge@meta.data %>%
mutate(umap3d_1 = so_merge@reductions$umap3d@cell.embeddings[,1]) %>%
mutate(umap3d_2 = so_merge@reductions$umap3d@cell.embeddings[,2]) %>%
mutate(umap3d_3 = so_merge@reductions$umap3d@cell.embeddings[,3])
plotly::plot_ly(so_meta,
x = ~umap3d_1,
y = ~umap3d_2,
z = ~umap3d_3,
color = ~seurat_clusters)
## No trace type specified:
## Based on info supplied, a 'scatter3d' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
## No scatter3d mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
plotly::plot_ly(so_meta,
x = ~umap3d_1,
y = ~umap3d_2,
z = ~umap3d_3,
color = ~lineage)
## No trace type specified:
## Based on info supplied, a 'scatter3d' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
## No scatter3d mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
Find DEG & enriched ontologies for each cluster overall
load gene enrichment libraries
library(clusterProfiler)
##
## clusterProfiler v4.0.5 For help: https://yulab-smu.top/biomedical-knowledge-mining-book/
##
## If you use clusterProfiler in published research, please cite:
## T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L Zhan, X Fu, S Liu, X Bo, and G Yu. clusterProfiler 4.0: A universal enrichment tool for interpreting omics data. The Innovation. 2021, 2(3):100141. doi: 10.1016/j.xinn.2021.100141
##
## Attaching package: 'clusterProfiler'
## The following object is masked from 'package:purrr':
##
## simplify
## The following object is masked from 'package:stats':
##
## filter
library(org.Mm.eg.db)
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Loading required package: S4Vectors
## Warning: package 'S4Vectors' was built under R version 4.1.2
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:clusterProfiler':
##
## rename
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following object is masked from 'package:tidyr':
##
## expand
## The following objects are masked from 'package:Matrix':
##
## expand, unname
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
##
## Attaching package: 'IRanges'
## The following object is masked from 'package:clusterProfiler':
##
## slice
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
## The following object is masked from 'package:purrr':
##
## reduce
## The following object is masked from 'package:grDevices':
##
## windows
##
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:clusterProfiler':
##
## select
## The following object is masked from 'package:dplyr':
##
## select
##
Find DEG
Idents(so_merge) <- 'cluster_l2'
markers <- FindAllMarkers(so_merge)
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 5
## Calculating cluster 6
## Calculating cluster 7
## Calculating cluster 8
## Calculating cluster 9
## Calculating cluster 10
## Calculating cluster 11
## Calculating cluster 12
## Calculating cluster 13
## Calculating cluster 14.1
## Calculating cluster 14.2
## Calculating cluster 14.3
## Calculating cluster 15
## Calculating cluster 16
## Calculating cluster 17
## Calculating cluster 18
## Calculating cluster 19
## Calculating cluster 20
## Calculating cluster 21
## Calculating cluster 22
## Calculating cluster 23
## Calculating cluster 24
top_markers <- markers %>%
group_by(cluster) %>%
arrange(desc(avg_log2FC)) %>%
slice_head(n = 5)
# Scale data for all genes for heatmap purposes
so_merge <- ScaleData(so_merge,
features = rownames(so_merge))
## Centering and scaling data matrix
DoHeatmap(so_merge, features = top_markers$gene)
write_csv(markers, 'analysis_files/s3_cluster_l2_markers.csv')
Save DEG for each lineage that has more than one cluster
for(i in unique(so_merge@meta.data$lineage)){
lineage_clusters <- so_merge@meta.data %>%
filter(lineage == i) %>%
pull(cluster_l2) %>%
unique() %>%
droplevels()
if(length(lineage_clusters) > 1){
lineage_markers <-markers %>%
filter(cluster %in% lineage_clusters)
write_csv(lineage_markers, paste0('analysis_files/s3_', i, '_markers.csv'))
}
}
save rds output
saveRDS(so_merge, file = 'analysis_files/s3_celltypes.rds')
sessionInfo()
sessionInfo()
## R version 4.1.1 (2021-08-10)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19044)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_United States.1252
## [2] LC_CTYPE=English_United States.1252
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.1252
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] org.Mm.eg.db_3.13.0 AnnotationDbi_1.56.2 IRanges_2.28.0
## [4] S4Vectors_0.32.3 Biobase_2.54.0 BiocGenerics_0.40.0
## [7] clusterProfiler_4.0.5 harmony_0.1.0 Rcpp_1.0.7
## [10] bluster_1.2.1 cluster_2.1.2 SeuratObject_4.0.4
## [13] Seurat_4.1.0 forcats_0.5.1 stringr_1.4.0
## [16] dplyr_1.0.8 purrr_0.3.4 readr_2.1.2
## [19] tidyr_1.2.0 tibble_3.1.6 ggplot2_3.3.5
## [22] tidyverse_1.3.1 Matrix_1.3-4
##
## loaded via a namespace (and not attached):
## [1] utf8_1.2.2 reticulate_1.24 tidyselect_1.1.2
## [4] RSQLite_2.2.9 htmlwidgets_1.5.4 grid_4.1.1
## [7] BiocParallel_1.28.3 Rtsne_0.15 scatterpie_0.1.7
## [10] munsell_0.5.0 codetools_0.2-18 ica_1.0-2
## [13] future_1.24.0 miniUI_0.1.1.1 withr_2.5.0
## [16] spatstat.random_2.1-0 colorspace_2.0-3 GOSemSim_2.18.1
## [19] highr_0.9 knitr_1.37 rstudioapi_0.13
## [22] ROCR_1.0-11 tensor_1.5 DOSE_3.18.3
## [25] listenv_0.8.0 labeling_0.4.2 GenomeInfoDbData_1.2.7
## [28] polyclip_1.10-0 bit64_4.0.5 farver_2.1.0
## [31] pheatmap_1.0.12 downloader_0.4 treeio_1.16.2
## [34] parallelly_1.30.0 vctrs_0.3.8 generics_0.1.2
## [37] xfun_0.29 R6_2.5.1 GenomeInfoDb_1.30.1
## [40] graphlayouts_0.7.1 rmdformats_1.0.3 gridGraphics_0.5-1
## [43] fgsea_1.18.0 bitops_1.0-7 spatstat.utils_2.3-0
## [46] cachem_1.0.6 assertthat_0.2.1 vroom_1.5.7
## [49] promises_1.2.0.1 scales_1.1.1 ggraph_2.0.5
## [52] enrichplot_1.12.3 gtable_0.3.0 globals_0.14.0
## [55] goftest_1.2-3 tidygraph_1.2.0 rlang_1.0.1
## [58] splines_4.1.1 lazyeval_0.2.2 spatstat.geom_2.3-2
## [61] broom_0.7.12 yaml_2.3.5 reshape2_1.4.4
## [64] abind_1.4-5 modelr_0.1.8 crosstalk_1.2.0
## [67] backports_1.3.0 httpuv_1.6.5 qvalue_2.24.0
## [70] tools_4.1.1 bookdown_0.24 ggplotify_0.1.0
## [73] ellipsis_0.3.2 spatstat.core_2.4-0 jquerylib_0.1.4
## [76] RColorBrewer_1.1-2 ggridges_0.5.3 plyr_1.8.6
## [79] zlibbioc_1.40.0 RCurl_1.98-1.5 rpart_4.1-15
## [82] deldir_1.0-6 viridis_0.6.2 pbapply_1.5-0
## [85] cowplot_1.1.1 zoo_1.8-9 haven_2.4.3
## [88] ggrepel_0.9.1 fs_1.5.2 magrittr_2.0.1
## [91] data.table_1.14.2 RSpectra_0.16-0 scattermore_0.8
## [94] DO.db_2.9 lmtest_0.9-39 reprex_2.0.1
## [97] RANN_2.6.1 fitdistrplus_1.1-6 matrixStats_0.61.0
## [100] hms_1.1.1 patchwork_1.1.1 mime_0.12
## [103] evaluate_0.15 xtable_1.8-4 readxl_1.3.1
## [106] gridExtra_2.3 compiler_4.1.1 shadowtext_0.1.1
## [109] KernSmooth_2.23-20 crayon_1.5.0 htmltools_0.5.2
## [112] ggfun_0.0.5 mgcv_1.8-36 later_1.3.0
## [115] tzdb_0.2.0 aplot_0.1.2 lubridate_1.8.0
## [118] DBI_1.1.2 tweenr_1.0.2 dbplyr_2.1.1
## [121] MASS_7.3-54 cli_3.1.1 parallel_4.1.1
## [124] igraph_1.2.7 pkgconfig_2.0.3 plotly_4.10.0
## [127] spatstat.sparse_2.1-0 xml2_1.3.3 ggtree_3.0.4
## [130] bslib_0.3.1 XVector_0.34.0 rvest_1.0.2
## [133] yulab.utils_0.0.4 digest_0.6.27 sctransform_0.3.3
## [136] RcppAnnoy_0.0.19 spatstat.data_2.1-2 Biostrings_2.62.0
## [139] fastmatch_1.1-3 rmarkdown_2.11 cellranger_1.1.0
## [142] leiden_0.3.9 tidytree_0.3.8 uwot_0.1.11
## [145] shiny_1.7.1 lifecycle_1.0.1 nlme_3.1-152
## [148] jsonlite_1.7.2 BiocNeighbors_1.10.0 limma_3.48.3
## [151] viridisLite_0.4.0 fansi_1.0.2 pillar_1.7.0
## [154] lattice_0.20-44 KEGGREST_1.34.0 fastmap_1.1.0
## [157] httr_1.4.2 survival_3.2-11 GO.db_3.13.0
## [160] glue_1.6.1 png_0.1-7 bit_4.0.4
## [163] ggforce_0.3.3 stringi_1.7.6 sass_0.4.0
## [166] blob_1.2.2 memoise_2.0.1 ape_5.5
## [169] irlba_2.3.5 future.apply_1.8.1